In [758]:
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LassoCV, LinearRegression
from numpy.random import poisson
from dash import Dash, html, dcc, Input, Output, callback, dash_table
import plotly.express as px
import plotly.figure_factory as ff
import dash_mantine_components as dmc
import plotly.graph_objects as go
from plotly.subplots import make_subplots
In [759]:
# if os.environ['CONDA_DEFAULT_ENV'] != 'cleague':
# os.system("conda activate cleague")
# if os.environ['CONDA_DEFAULT_ENV'] != 'cleague':
# os.system("conda env create -f environment.yml")
# os.system("conda activate cleague")
# print(os.environ['CONDA_DEFAULT_ENV'])
In [760]:
!jupyter nbconvert --to html --template full trial.ipynb
In [761]:
# PATH = r"C:\Users\Tong Chen Rong\Desktop\cleague\data\Fifa 23 Players Data.csv"
PATH = os.getcwd()+'/data/Fifa 23 Players Data.csv'
In [762]:
df = pd.read_csv(PATH)
In [763]:
df.head(20)
Out[763]:
In [764]:
POSITIONS = df["Best Position"].unique()
In [765]:
CAM = df[df["Best Position"] == "CAM"]
In [766]:
Y = CAM["CAM Rating"]
In [767]:
FEATURE_NAMES = ['Crossing',
'Finishing', 'Heading Accuracy', 'Short Passing', 'Volleys',
'Dribbling', 'Curve', 'Freekick Accuracy', 'LongPassing', 'BallControl',
'Acceleration', 'Sprint Speed', 'Agility', 'Reactions', 'Balance',
'Shot Power', 'Jumping', 'Stamina', 'Strength', 'Long Shots',
'Aggression', 'Interceptions', 'Positioning', 'Vision', 'Penalties',
'Composure', 'Marking', 'Standing Tackle', 'Sliding Tackle',
'Goalkeeper Diving', 'Goalkeeper Handling', ' GoalkeeperKicking',
'Goalkeeper Positioning', 'Goalkeeper Reflexes']
In [768]:
POISITION_CODE = {"CF":"Center Forward",
"ST":"Striker",
"RW":"Right Winger",
"LW":"Left Winger",
"GK":"Goalkeeper",
"CB":"Center Back",
"CDM":"Central Defensive Midfielder",
"LB":"Left Back",
"RB":"Right Back",
"LWB":"Left Wing Back",
"RWB":"Right Wing Back",
"CAM":"Central Attacking Midfielder",
"CM":"Central Midfielder",
"LM":"Left Midfielder",
"RM":"Right Midfielder"}
In [769]:
X = CAM[FEATURE_NAMES]
In [770]:
model = LassoCV(cv=5, max_iter=5000)
clf = model.fit(X, Y)
In [771]:
importance = pd.Series(clf.coef_)
plt.figure(figsize=(20,10))
plt.bar(x = FEATURE_NAMES, height=np.abs(importance))
plt.xticks(rotation = 75)
plt.axhline(y=0.025, color='k', linestyle='--')
plt.title("Feature Importance of {}".format(POISITION_CODE["CAM"]), fontdict={'fontsize':15})
plt.show()
In [772]:
# np.sum(importance)
In [773]:
# model2 = LinearRegression()
# clf2 = model2.fit(X, Y)
In [774]:
# clf2.coef_
In [775]:
def calculateImportance(positionCode:str):
players = df[df["Best Position"] == positionCode]
y = players[positionCode + ' Rating']
X = players[FEATURE_NAMES]
model = LassoCV()
clf = model.fit(X, y)
importance = np.abs(clf.coef_).tolist()
return importance
In [776]:
# calculateImportance("LW")
In [777]:
role_weights = pd.DataFrame(index=POSITIONS, columns=FEATURE_NAMES)
for role in POSITIONS:
role_weights.loc[role] = calculateImportance(role)
In [778]:
# role_weights.loc["CAM", :]
In [779]:
role_weights
Out[779]:
In [780]:
role_weights_filter = role_weights.mask(role_weights<0.01, 0)
In [781]:
role_sum = np.sum(role_weights_filter, axis=1)
In [782]:
traits_sum = np.sum(role_weights_filter, axis=0)
In [783]:
redundant_traits = traits_sum[traits_sum==0]
df = df.drop(redundant_traits.index.to_list(), axis=1)
role_weights_filter = role_weights_filter.drop(redundant_traits.index.to_list(), axis=1)
FEATURE_NAMES = [FEATURES for FEATURES in FEATURE_NAMES if FEATURES not in redundant_traits.index]
In [784]:
# redundant_traits.index.any()
In [785]:
# FEATURE_NAMES
Normalising Weights¶
In [786]:
ROLE_WEIGHTS_NORM = role_weights_filter.divide(role_sum, axis='rows')
In [787]:
importance = ROLE_WEIGHTS_NORM.loc["CAM"]
plt.figure(figsize=(20,10))
plt.bar(x = FEATURE_NAMES, height=np.abs(importance))
plt.xticks(rotation = 75)
plt.axhline(y=0.025, color='k', linestyle='--')
plt.title("Feature Importance of {}".format(POISITION_CODE["CAM"]), fontdict={'fontsize':15})
plt.show()
In [788]:
# role_sum
In [789]:
fig = px.bar(x=role_sum.index, y=role_sum, text_auto=True)
fig.update_layout(title='Bar Chart showing sum of Attribute Weights by Role',
xaxis_title='Roles',
yaxis_title='Total Weight',
width=1000,
height=500,
margin=dict(r=10, b=10, l=10, t=30), )
fig.show(renderer='notebook')
In [790]:
role_sum_norm = np.sum(ROLE_WEIGHTS_NORM, axis=1)
In [791]:
# role_sum_norm
fig = px.bar(x=role_sum_norm.index, y=role_sum_norm, text_auto=True)
fig.update_layout(title='Bar Chart showing sum of Attribute Weights by Role after Normalisation',
xaxis_title='Role',
yaxis_title='Total Weight',
width=1000,
height=500,
margin=dict(r=10, b=10, l=10, t=30), )
fig.show(renderer='notebook')
In [792]:
def visualiseFeature(positionCode:str):
weight = ROLE_WEIGHTS_NORM.loc[positionCode]
plt.figure(figsize=(15,10))
plt.bar(x = FEATURE_NAMES, height=weight)
plt.xticks(rotation = 75)
plt.axhline(y=0.01, color='k', linestyle='--')
plt.title("Feature Importance of {}".format(POISITION_CODE[positionCode]), fontdict={'fontsize':15})
plt.savefig(os.getcwd()+"/figures/{}_featureimportance.png".format(positionCode))
# plt.show()
plt.close()
In [793]:
for position in POSITIONS:
visualiseFeature(position)
Teams Creator¶
In [794]:
ALL_TEAMS = df["Club Name"].unique().tolist()
In [795]:
def getPlayersFromTeam(teamName:str):
players = df[df["Club Name"]==teamName]
return players
In [796]:
# getPlayersFromTeam("Chelsea")
In [797]:
def getPlayersFromPosition(teamName:str, positionCode:str):
players = getPlayersFromTeam(teamName)
players_position = players[(players["Best Position"]==positionCode) | (players["Positions Played"].str.contains(positionCode))]
return players_position
In [798]:
# getPlayersFromPosition("Paris Saint-Germain", "CAM")
In [799]:
# df["Positions Played"].str.contains("CAM")
In [800]:
# df["Best Position"]=="CAM"
Team Creator Application¶
In [801]:
app = Dash(__name__)
# Initialize the app - incorporate a Dash Mantine theme
external_stylesheets = [dmc.theme.DEFAULT_COLORS]
app = Dash(__name__)
# App layout
app.layout = dmc.MantineProvider(
theme={"colorScheme":"dark"},
children=dmc.Container([
dmc.Title('Match-up Creator', color="blue", size="h2", align='center'),
dmc.Stack([
dmc.Grid([
dmc.Col([
dmc.Select(
label="Select Home Team",
placeholder="Home Team",
id="hometeam-select",
searchable=True,
clearable=True,
data=ALL_TEAMS,
style={"width": 'auto', "marginBottom": 10},
),
], span='auto'),
dmc.Col([
dmc.Select(
label="Select Away Team",
placeholder="Away Team",
id="awayteam-select",
searchable=True,
clearable=True,
data=ALL_TEAMS,
style={"width": 'auto', "marginBottom": 10},
),
], span='auto'),
], align='center'),
# dmc.Grid([
# dmc.Col([
# dmc.Select(
# label="Select Home Team",
# placeholder="Home Team",
# id="hometeam-select",
# searchable=True,
# clearable=True,
# data=ALL_TEAMS,
# style={"width": 'auto', "marginBottom": 10},
# ),
# ], span='auto'),
# dmc.Col([
# dmc.Select(
# label="Select Away Team",
# placeholder="Away Team",
# id="awayteam-select",
# searchable=True,
# clearable=True,
# data=ALL_TEAMS,
# style={"width": 'auto', "marginBottom": 10},
# ),
# ], span='auto'),
# ], align='center'),
]),
], fluid=True))
# Add controls to build the interaction
@callback(Output("hometeam-value", "children"), Input("hometeam-select", "value"))
def select_value(value):
return value
@callback(Output("awayteam-value", "children"), Input("awayteam-select", "value"))
def select_value(value):
return value
app.run(debug=True)
Goals Score Probability Calculator¶
In [802]:
POSITION_CONTRIBUTION = pd.read_csv(os.getcwd()+'/data/position_contribution.csv')
POSITION_CONTRIBUTION = POSITION_CONTRIBUTION.set_index('Unnamed: 0')
In [803]:
# POSITION_CONTRIBUTION.loc["LW"]
In [804]:
getPlayersFromTeam("Arsenal")
Out[804]:
In [805]:
# SAMPLE_TEAM = {"NAME":"POSITIONCODE"}
SAMPLE_TEAM_CHELSEA = {"Édouard Mendy":"GK",
"Kalidou Koulibaly":"CB",
"Thiago Emiliano da Silva":"CB",
"Reece James":"RWB",
"Ben Chilwell":"LWB",
"N'Golo Kanté":"CDM",
"Luiz Frello Filho Jorge":"CM",
"Mateo Kovačić":"CM",
"Raheem Sterling":"RW",
"Christian Pulisic":"LW",
"Pierre-Emerick Aubameyang":"ST"}
SAMPLE_TEAM_ARSENAL = {"Aaron Ramsdale":"GK",
"Gabriel dos S. Magalhães":"CB",
"William Saliba":"CB",
"Takehiro Tomiyasu":"CB",
"Kieran Tierney":"LWB",
"Thomas Partey":"CDM",
"Martin Ødegaard":"CAM",
"Bukayo Saka":"LM",
"Emile Smith Rowe":"CAM",
"Oleksandr Zinchenko":"CM",
"Gabriel Fernando de Jesus":"ST"}
In [806]:
# calculatePlayerScore(player:str, role:str)
# Calculates Player's attack and defense scores. Player's attributes are weighted by role he is playing in. Scores are weighted by role's contribution to teams attack and defense
# Attributes:
# player:str
# Player's fullname
# role:str
# Position code of role
# Return:
# attack_score: total attack score of player
# defense_score: total defense score of player
# player_attributes_weighted: List of weighted attributes of player
def calculatePlayerScore(player:str, role:str):
player_attributes = df.loc[df["Full Name"]==player][FEATURE_NAMES]
weight = ROLE_WEIGHTS_NORM.loc[role]
player_attributes_weighted = player_attributes * weight
player_attributes_weighted = player_attributes_weighted.iloc[0].to_list()
player_score = np.dot(player_attributes, weight)[0]
role_contribution = POSITION_CONTRIBUTION.loc[role]
attack_score = player_score * role_contribution.loc["Attack"]
defense_score = player_score * role_contribution.loc["Defense"]
return attack_score, defense_score, player_attributes_weighted
# return player_attributes_weighted
# return player_attributes
In [807]:
# type(calculatePlayerScore( "Gabriel dos S. Magalhães","CB")[2])
In [808]:
def calculateTeamScore(team:dict):
team_performance = pd.DataFrame(columns=["Position","Full Name", "Attack Score", "Defense Score"]+FEATURE_NAMES)
players = team.items()
for player, role in players:
attack_indiv, defense_indiv, attributes_indiv = calculatePlayerScore(player, role)
team_performance.loc[len(team_performance)] = [role, player, attack_indiv, defense_indiv]+attributes_indiv
team_performance.insert(2, "Total Score", team_performance["Attack Score"]+team_performance["Defense Score"])
return team_performance
In [809]:
CHELSEA = calculateTeamScore(SAMPLE_TEAM_CHELSEA)
ARSENAL = calculateTeamScore(SAMPLE_TEAM_ARSENAL)
In [810]:
ARSENAL
Out[810]:
In [811]:
fig = px.icicle(ARSENAL.melt(id_vars=['Full Name'], value_vars=['Attack Score', 'Defense Score'])
, path=[px.Constant("Total Score"), 'variable', 'Full Name'], values='value', width=800, height=800, )
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), title="Chart showing Arsenal Players' Contribution to Attack and Defense")
fig.show(renderer='notebook')
In [812]:
fig = px.sunburst(ARSENAL.melt(id_vars=['Position','Full Name'], value_vars=FEATURE_NAMES), path=[px.Constant("Total Score"),'Position','Full Name', 'variable'], values='value', width=800, height=800)
fig.update_traces(root_color="lightgrey")
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), title="Sunburst Chart showing Arsenal Players' Attributes")
fig.show(renderer='notebook')
In [813]:
chelsea_attack = np.sum(CHELSEA["Attack Score"])
chelsea_defense = np.sum(CHELSEA["Defense Score"])
arsenal_attack = np.sum(ARSENAL["Attack Score"])
arsenal_defense = np.sum(ARSENAL["Defense Score"])
In [814]:
arsenal_lambda = arsenal_attack - chelsea_defense
chelsea_lambda = chelsea_attack - arsenal_attack
In [815]:
arsenal_lambda
Out[815]:
In [816]:
chelsea_lambda
Out[816]:
In [817]:
MAX = 400
MIN = -400
SCALE_FACTOR = 5
AWAY_FACTOR = 0.95
HOME_FACTOR = 1.05
def calculateLambda(hometeam:pd.DataFrame, awayteam:pd.DataFrame):
home_attack = np.sum(hometeam["Attack Score"]) * HOME_FACTOR
home_defense = np.sum(hometeam["Defense Score"]) * HOME_FACTOR
away_attack = np.sum(awayteam["Attack Score"]) * AWAY_FACTOR
away_defense = np.sum(awayteam["Defense Score"]) * AWAY_FACTOR
home_lambda_raw = home_attack - away_defense
away_lambda_raw = away_attack - home_defense
home_lambda = (home_lambda_raw-MIN) / (MAX-MIN) * SCALE_FACTOR
away_lambda = (away_lambda_raw-MIN) / (MAX-MIN) * SCALE_FACTOR
return home_lambda, away_lambda
In [818]:
CHELSEA_LAMBDA, ARSENAL_LAMBDA = calculateLambda(CHELSEA, ARSENAL)
In [819]:
# x = np.random.poisson(1.1777696411145124, 10000)
# y = np.random.poisson(0.39078506070956487, 10000)
#
# np.sum(x > y)
In [820]:
def calculateMatchOutcome(homelambda, awaylambda, iterations):
home_goal = poisson(homelambda, iterations)
away_goal = poisson(awaylambda, iterations)
homeWin = np.sum(home_goal > away_goal)
draw = np.sum(home_goal == away_goal)
awayWin = np.sum(away_goal > home_goal)
return np.array((home_goal, away_goal)).T, (homeWin, draw, awayWin)
In [821]:
CHELSEA_ARSENAL = calculateMatchOutcome(CHELSEA_LAMBDA, ARSENAL_LAMBDA, 20000)
In [822]:
# CHELSEA_SCORECOUNT = pd.Series(CHELSEA_ARSENAL[0][:,0]).value_counts()
# ARSENAL_SCORECOUNT = pd.Series(CHELSEA_ARSENAL[0][:,1]).value_counts()
# SCORE_COUNT = pd.DataFrame(columns=['Chelsea', 'Arsenal'])
# SCORE_COUNT['Chelsea'] = CHELSEA_SCORECOUNT
# SCORE_COUNT['Arsenal'] = ARSENAL_SCORECOUNT
# SCORE_COUNT = SCORE_COUNT.fillna(0.0).reset_index()
SCORE_COUNT = pd.DataFrame(CHELSEA_ARSENAL[0], columns=['Chelsea', 'Arsenal'])
fig = px.histogram(SCORE_COUNT, marginal="box", width=800, height=800, text_auto=True)
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), title="Histogram showing counts of Number of Goals Scored")
fig.show(renderer='notebook')
Odds Calculation¶
In [823]:
def winProbability(countOutcome:tuple):
total = sum(countOutcome)
probability = [outcome/total for outcome in countOutcome ]
return tuple(probability)
In [824]:
MATCH_PROBABILITIES = winProbability(CHELSEA_ARSENAL[1])
In [825]:
fig = px.bar(x=['Chelsea Win', 'Draw', 'Arsenal Win'], y=MATCH_PROBABILITIES, color=['Chelsea Win', 'Draw', 'Arsenal Win'], color_discrete_sequence=['blue', 'gray', 'red'], text_auto=True)
fig.update_layout(title='Bar Chart showing Probability Distribution of Outcomes',
xaxis_title='Outcomes',
yaxis_title='Probability',
width=1000,
height=700,
margin=dict(r=10, b=10, l=10, t=30), )
fig.show(renderer='notebook')
In [826]:
def scoreProbability(scoretable:np.ndarray):
scoretable_df = pd.DataFrame(scoretable, columns=['Home', 'Away']).groupby(['Home','Away']).size().reset_index(name='Count')
In [827]:
x = pd.DataFrame(CHELSEA_ARSENAL[0], columns=['Home', 'Away'], )
x = x.groupby(['Home','Away']).size().reset_index(name='Count')
In [828]:
def calculateRawOdds(probability:tuple):
return tuple([1/prob for prob in probability])
In [829]:
RAW_PROBABILITY = winProbability(calculateMatchOutcome(1.7291590385448554, 0.9913609944152171, 20000)[1])
RAW_ODDS = calculateRawOdds(RAW_PROBABILITY)
In [830]:
RAW_ODDS
Out[830]:
In [831]:
RAW_PROBABILITY
Out[831]:
In [832]:
MARGIN_ODDS = (1.8, 4, 4.5)
In [833]:
def expectedProfit(odds, betpercentage, probability):
profit = sum([(1-odds[i]*betpercentage[i])*probability[i] for i in range(len(odds))])
# sum([((betpercentage[(i+1)%3])+(betpercentage[(i+2)%3]))*(1-1/odds[i]) - (odds[i]*betpercentage[i])*(1/odds[i]) for i in range(len(odds))]))
return profit
In [834]:
def setMargin(rawodds, margin, probability):
marginodds = 1
return marginodds
In [835]:
home = np.linspace(0, 1, 100)
rest = np.ones(100) - home
DRAW_DIST = np.linspace(0,1,11).round(1)
AWAY_DIST = 1 - DRAW_DIST
profitTable = pd.DataFrame(columns=['Proportion Draw', 'Home', 'Away+Draw', 'Profit'])
for dist in range(len(DRAW_DIST)):
draw = rest * DRAW_DIST[dist]
away = rest * AWAY_DIST[dist]
Grids = np.array((home, draw, away))
profit = []
for i in range(100):
profit.append(expectedProfit(MARGIN_ODDS, Grids[:,i].tolist(), RAW_PROBABILITY))
data = {'Proportion Draw':np.ones(100)*DRAW_DIST[dist], 'Home':home, 'Away+Draw':rest, 'Profit':profit}
temp_table = pd.DataFrame(data=data)
profitTable = pd.concat([profitTable, temp_table], axis=0)
In [836]:
fig = px.line_3d(data_frame = profitTable, z='Profit', x='Home', y='Away+Draw', color='Proportion Draw')
fig.update_layout(title='Graph showing the effect of Bet Percentage on Expected Profit',
scene = dict(
xaxis_title='Home Bet Size %',
yaxis_title='Draw + Away Bet Size %',
zaxis_title='Expected Profit'),
width=1000,
height=700,
margin=dict(r=10, b=10, l=10, t=30))
fig.show(renderer='notebook')
This line graph shows that betting behaviour, that is, the percentage of punters that bet on respective outcomes, changes the expected profit from this match.
The line represents expected profit as a function of Home Bet Size as % of total bets. Each line represents the expected profit curve when away and draw percentage are divided differently.
As seen from the graph above, given an arbitrary odds of Home=1.8, Draw=4, and Away=4.5, the maximum expected profit curve occurs when the pot less Home bets is placed on Draw (Navy Line).